/************************************   

DESCRIPTION: This program puts female beneficiaries into bins, or blocks, based
 on their propensity score for estimation of the blocking estimator. The blocks
 are determined by either the size of the block or whether the 'treated' and the 
 'control' group beneficiaries are statistically different from each other within the 
  block.
  
  Datasets used:
   (1) femLPManalysis2
  
  Datasets created:
   (1) femAllBlocks
   (2) femDiabBlocks
   (3) femeHypBlocks


*************************************/  

clear all
set more off
capture log close

set matsize 10000

global base "\\192.168.6.107\silstorage"
*global base "N:"
cd "$base\MedicareClaims-P045601-BE\Work\hosp_retro\health_out\Program\PropensityScore\Patients\ResponsetoReferee\OtherPropScoreMethods\Blocking"

global origData "$base\MedicareClaims-P045601-BE"
*Use data from LPM
global dataIn "$base\MedicareClaims-P045601-BE\Work\hosp_retro\health_out\Program\PropensityScore\Patients\ResponsetoReferee\OtherPropScoreMethods\LPM"
global dataOut "$base\MedicareClaims-P045601-BE\Work\hosp_retro\health_out\Data-Out"
global dpath "$base\MedicareClaims-P045601-BE\Work\ay_data"
global dataProp "$base\MedicareClaims-P045601-BE\Work\hosp_retro\health_out\Data-Out\PropScore\Patients"
global logs "$base\MedicareClaims-P045601-BE\Work\hosp_retro\health_out\Logs\PropScore\Patients"
global dpath "$base\MedicareClaims-P045601-BE\Work\ay_data"
global skapath "$base\MedicareClaims-P045601-BE\Work\ska"
 
adopath +  "$base\SIL-Common\estout"
adopath +  "$base\SIL-Common\outreg2"
adopath +  "$base\SIL-Common\reghdfe-master/package"

log using ".\1b.BlockEstimation_FemaleCreate.log", replace

*Columnames program

program getColnames 
 quietly {
  matrix A = e(b)'
 
 local rownames : rowfullnames A
 local c : word count `rownames'
 local names : colfullnames A

 svmat A, names(col)

 gen rownames = ""
 forvalues i = 1/`c' {

  replace rownames = "`:word `i' of `rownames''" in `i' 
   } 

 }

end


program getRegStats
 quietly {
  matrix A = e(b)'
 
 local rownames : rowfullnames A
 local c : word count `rownames'
 local n = `c' + 1
 local rsq = `c' + 3

 replace beta1 =e(N) in `n' 
 replace beta1 =e(r2_p) in `rsq' 

 }

end

*This creates the blocking estimator blocks from the t-stats
*Want it dynamic because we'll keep splitting the blocks until t-stat is < 1.96

 program blocking
*Define arguments
  args centiles cnt
  *get the centile estimates conditional on treatment for each centile we specify (start with 4)
  centile EHAT if vmerger==1, centile(`centiles' )
  *the count of centile estimates we care about (start with 4)
  local wc = `cnt' -1 
 capture drop block  

 *this generates the block - start with the first one outside the loop
 gen block =1 if  EHAT < r(c_1)
  forval k = 1(1)`wc' {
   di "`k'"
   local kp1 = `k' +1
   di "`kp1'"
   if `k' < `wc' {
   replace block =`kp1' if  r(c_`k') <= EHAT & EHAT < r(c_`kp1')
   }
   if `k' >= `wc' {
   replace block =`kp1' if  r(c_`k') <= EHAT 
   }
}
 *Just a check to see all the observations have a block  
   tab block, miss
   
 *now create the Log odds ratio for calculation of the t within the block
  levelsof block, local(locblock)
   capture drop lor
   gen lor = log(EHAT/(1-EHAT))
   
 *construct the t-stat for differences in LOR of treated and untreated groups within each block
  *first get the relevant statistics for use in the t-stat
forval vm = 0(1)1 {
 foreach bk of local locblock {
    di `vm' " " `bk'
 summ lor if vmerger ==`vm' & block ==`bk' 
  scalar mn`vm'_`bk' = r(mean)
  scalar var`vm'_`bk' = r(Var)
  scalar ct`vm'_`bk' = r(N)
    }
  }
  *now calculate the t-stat
 foreach b of local locblock {
  scalar t`b' = (mn0_`b' - mn1_`b')/sqrt(var0_`b'/ct0_`b' + var1_`b'/ct1_`b' )
  di `b'
  di t`b'
 }
 
 end
 
 

**************** All *************************
 
 
 
use BENE_ID mxdiabetes_p_all ehat vmerger mxhypertension_p_all mxicd8_chronic_p_all using $dataIn\femLPManalysis2.dta
 *Drop these contradictions
 drop if mxhypertension_p_all==1 & mxicd8_chronic_p_all==0
  gen EHAT = ehat
  drop if EHAT ==.
   keep BENE_ID EHAT vmerger
  *Unique people
  egen maxvmerger = max(vmerger), by(BENE_ID)
  drop vmerger
  rename maxvmerger vmerger
  duplicates drop

*this is just a while variable.  
local wvar = 0 
*this is the primer. Imbens said most people have 5 blocks. We'll have many
local cen 20 40 60 80 

local looper = 0

*Doing while at least one t-stat is > 1.96. Currently just a loop. Change later
 while `wvar' < 1 { 
 local looper =`looper' + 1
 *Set to primer if first iteration 
 if `looper' ==1 {
 local cen `cen' 
 }
 *Set to split groups if relevant
 if `looper' > 1 {
 local cen `newcen'
 }

  local wc : word count `cen' 
  local wc = `wc' +1  

*Effectively drop these locals
  local newcen = .
  forval i=1(1)`wc' {
  local c_a`i' = .
  }

 *THE FOLLOWING LOOP LOOKS TO SEE IF THE BLOCK NEEDS TO BE SPLIT AT MEDIAN
 *Looping over current set of centiles. primer list has 4
   forval i=1(1)`wc' {
   local j = `i' -1
   blocking "`cen'" `wc' 
      
  *THIS gets the current and the previous cutoff values to take "median"
   *There is one more group than cutoff value. Dealing with that issue
   if `i' < `wc'  {
   local temp`i' `: word `i' of `cen''
   }
   *There is one more group than cutoff value. Dealing with that issue
   if `j' >=1  {
   local temp`j' `: word `j' of `cen''
   }

 *Finds the value that splits the first block if t-stat is big
 if `i' ==1 & abs(t`i') > 1.96 {
   local c_a`i' = (`temp`i'' ) / 2 
   di `c_a`i'' 
   }

  *Finds the value that splits all blocks other than first one if t-stat is big
  if `i' >1 & `i'<`wc' & abs(t`i') > 1.96 {
   local c_a`i' = (`temp`i'' - `temp`j'' ) / 2 + `temp`j'' 
   di `c_a`i''
   }

  *Finds the value that splits the last block if t-stat is big
   if `i'==`wc' & abs(t`i') > 1.96 {
   local c_a`i' = (100 - `temp`j'' ) / 2 + `temp`j'' 
   di `c_a`i''
   }

 *Creates the centile groups for construction of block
 *When we don't split a groups, the splitter becomes missing, need to remove it use the "not" local to take it out of the block name
 local not .
 local wvar = 1
 *This is a loop for all of the centiles
  forval k=1(1)`wc' {
 *The first centile
    if `k' ==1 {
	local temp`k' `: word `k' of `cen''
     local newcen `c_a`k'' `temp`k''
	 local newcen: list newcen- not
   }
*The middle centiles
   if `k' >1 & `k' != `wc' {
	local temp`k' `: word `k' of `cen''
    local newcen `newcen' `c_a`k'' `temp`k''
	 local newcen: list newcen- not
   }
*The last centile
    if `k' == `wc' {
    local newcen `newcen' `c_a`k'' 
	 local newcen: list newcen- not
   }
*Get the while variable
   local allblock = 1
   if abs(t`k') > 1.96 & ct1_`k' > 35 { 
     local allblock = 0
    }
	local wvar = `wvar' * `allblock'
   }
    di t`i' " " ct1_`i'
    di `c_a`i''
    di `temp`i'' " " `temp`j'' 
    di "`cen'"
    di "`newcen'"
    di "`wvar'" " " t`i' " " ct1_`i'

   }
  }
  
 
save ./femAllBlocks.dta, replace
clear 



**************** DIABETES *************************
 
 
 
use BENE_ID mxdiabetes_p_all ehat vmerger mxhypertension_p_all mxicd8_chronic_p_all using $dataIn\femLPManalysis2.dta
 *Drop these contradictions
 drop if mxhypertension_p_all==1 & mxicd8_chronic_p_all==0
*DIABETES
 keep if mxdiabetes_p_all ==1
  gen EHAT = ehat
  drop if EHAT ==.
*Diabetes 
  keep BENE_ID EHAT vmerger
  *Unique people
  egen maxvmerger = max(vmerger), by(BENE_ID)
  drop vmerger
  rename maxvmerger vmerger
  duplicates drop

*this is just a while variable.  
local wvar = 0 
*this is the primer. Imbens said most people have 5 blocks. We'll have many
local cen 20 40 60 80 

local looper = 0

*Doing while at least one t-stat is > 1.96. Currently just a loop. Change later
 while `wvar' < 1 { 
 local looper =`looper' + 1
 *Set to primer if first iteration 
 if `looper' ==1 {
 local cen `cen' 
 }
 *Set to split groups if relevant
 if `looper' > 1 {
 local cen `newcen'
 }

  local wc : word count `cen' 
  local wc = `wc' +1  

*Effectively drop these locals
  local newcen = .
  forval i=1(1)`wc' {
  local c_a`i' = .
  }

 *THE FOLLOWING LOOP LOOKS TO SEE IF THE BLOCK NEEDS TO BE SPLIT AT MEDIAN
 *Looping over current set of centiles. primer list has 4
   forval i=1(1)`wc' {
   local j = `i' -1
   blocking "`cen'" `wc' 
      
  *THIS gets the current and the previous cutoff values to take "median"
   *There is one more group than cutoff value. Dealing with that issue
   if `i' < `wc'  {
   local temp`i' `: word `i' of `cen''
   }
   *There is one more group than cutoff value. Dealing with that issue
   if `j' >=1  {
   local temp`j' `: word `j' of `cen''
   }

 *Finds the value that splits the first block if t-stat is big
 if `i' ==1 & abs(t`i') > 1.96 {
   local c_a`i' = (`temp`i'' ) / 2 
   di `c_a`i'' 
   }

  *Finds the value that splits all blocks other than first one if t-stat is big
  if `i' >1 & `i'<`wc' & abs(t`i') > 1.96 {
   local c_a`i' = (`temp`i'' - `temp`j'' ) / 2 + `temp`j'' 
   di `c_a`i''
   }

  *Finds the value that splits the last block if t-stat is big
   if `i'==`wc' & abs(t`i') > 1.96 {
   local c_a`i' = (100 - `temp`j'' ) / 2 + `temp`j'' 
   di `c_a`i''
   }

 *Creates the centile groups for construction of block
 *When we don't split a groups, the splitter becomes missing, need to remove it use the "not" local to take it out of the block name
 local not .
 local wvar = 1
 *This is a loop for all of the centiles
  forval k=1(1)`wc' {
 *The first centile
    if `k' ==1 {
	local temp`k' `: word `k' of `cen''
     local newcen `c_a`k'' `temp`k''
	 local newcen: list newcen- not
   }
*The middle centiles
   if `k' >1 & `k' != `wc' {
	local temp`k' `: word `k' of `cen''
    local newcen `newcen' `c_a`k'' `temp`k''
	 local newcen: list newcen- not
   }
*The last centile
    if `k' == `wc' {
    local newcen `newcen' `c_a`k'' 
	 local newcen: list newcen- not
   }
*Get the while variable
   local allblock = 1
   if abs(t`k') > 1.96 & ct1_`k' > 35 { 
     local allblock = 0
    }
	local wvar = `wvar' * `allblock'
   }
    di t`i' " " ct1_`i'
    di `c_a`i''
    di `temp`i'' " " `temp`j'' 
    di "`cen'"
    di "`newcen'"
    di "`wvar'" " " t`i' " " ct1_`i'

   }
  }
  
 
save ./femDiabBlocks.dta, replace
clear 
 
 **************** HYPERTENSION *************************
 
 
 
use BENE_ID vmerger mxicd8_chronic_p_all mxhypertension_p_all  ehat using $dataIn\femLPManalysis2.dta
 *Drop these contradictions
 drop if mxhypertension_p_all==1 & mxicd8_chronic_p_all==0
  keep if mxhypertension_p_all ==1
  gen EHAT =ehat 
  drop if EHAT ==.
*Hypertension
  keep BENE_ID EHAT vmerger
  *Unique people
  egen maxvmerger = max(vmerger), by(BENE_ID)
  drop vmerger
  rename maxvmerger vmerger
  duplicates drop

*this is just a while variable.  
local wvar = 0 
*this is the primer. Imbens said most people have 5 blocks. We'll have many
local cen 20 40 60 80 

local looper = 0

*Doing while at least one t-stat is > 1.96. Currently just a loop. Change later
 while `wvar' < 1 { 
 local looper =`looper' + 1
 *Set to primer if first iteration 
 if `looper' ==1 {
 local cen `cen' 
 }
 *Set to split groups if relevant
 if `looper' > 1 {
 local cen `newcen'
 }

  local wc : word count `cen' 
  local wc = `wc' +1  

*Effectively drop these locals
  local newcen = .
  forval i=1(1)`wc' {
  local c_a`i' = .
  }

 *THE FOLLOWING LOOP LOOKS TO SEE IF THE BLOCK NEEDS TO BE SPLIT AT MEDIAN
 *Looping over current set of centiles. primer list has 4
   forval i=1(1)`wc' {
   local j = `i' -1
   blocking "`cen'" `wc' 
      
  *THIS gets the current and the previous cutoff values to take "median"
   *There is one more group than there are cutoff values (this is necessarily true if think about it). Dealing with that issue
   if `i' < `wc'  {
   local temp`i' `: word `i' of `cen''
   }
   *There is one more group than cutoff value. Dealing with that issue
   if `j' >=1  {
   local temp`j' `: word `j' of `cen''
   }

 *Finds the value that splits the first block if t-stat is big
 if `i' ==1 & abs(t`i') > 1.96 {
   local c_a`i' = (`temp`i'' ) / 2 
   di `c_a`i'' 
   }

  *Finds the value that splits all blocks other than first and last if t-stat is big
  if `i' >1 & `i'<`wc' & abs(t`i') > 1.96 {
   local c_a`i' = (`temp`i'' - `temp`j'' ) / 2 + `temp`j'' 
   di `c_a`i''
   }

  *Finds the value that splits the last block if t-stat is big
   if `i'==`wc' & abs(t`i') > 1.96 {
   local c_a`i' = (100 - `temp`j'' ) / 2 + `temp`j'' 
   di `c_a`i''
   }

 *Creates the centile groups for construction of block
 *When we don't split a block, the splitter is a missing value and will need to remove it from local name, we use the "not" local to remove that missing value
 local not .
 local wvar = 1
 *This is a loop for all of the centiles
  forval k=1(1)`wc' {
 *The first centile
    if `k' ==1 {
	local temp`k' `: word `k' of `cen''
     local newcen `c_a`k'' `temp`k''
	 local newcen: list newcen- not
   }
*The middle centiles
   if `k' >1 & `k' != `wc' {
	local temp`k' `: word `k' of `cen''
    local newcen `newcen' `c_a`k'' `temp`k''
	 local newcen: list newcen- not
   }
*The last centile
    if `k' == `wc' {
    local newcen `newcen' `c_a`k'' 
	 local newcen: list newcen- not
   }
*Get the while variable
   local allblock = 1
   if abs(t`k') > 1.96 & ct1_`k' > 35 { 
     local allblock = 0
    }
	local wvar = `wvar' * `allblock'
   }
    di t`i' " " ct1_`i'
    di `c_a`i''
    di `temp`i'' " " `temp`j'' 
    di "`cen'"
    di "`newcen'"
    di "`wvar'" " " t`i' " " ct1_`i'

   }
  }
  
 
save ./femHypBlocks.dta, replace
clear
 
  